# install.packages("tidyverse")
# install.packages("timetk")
library(tidyverse)
library(readr) #read_csv()
library(dplyr)
library(glmnet)
library(caret) # may not need anymore
library(timetk)
library(rsample)

1) Load Data

# Load data:
df <- read_csv("./data/walmart/Walmart.csv")
head(df)

Fix Datatypes, Sort Values

# Fixing datatypes:
# any(grepl("/", df$Date))
print(sapply(df, class))
       Store         Date Weekly_Sales Holiday_Flag  Temperature   Fuel_Price          CPI Unemployment 
   "numeric"  "character"    "numeric"    "numeric"    "numeric"    "numeric"    "numeric"    "numeric" 
df$Date <- gsub("/", "-", df$Date) # replace slashes with dashes, so all dates follow same format
df$Date <- as.Date(df$Date, format = "%d-%m-%Y") # NOTE european-like date: day/month/year
print(sapply(df, class))
       Store         Date Weekly_Sales Holiday_Flag  Temperature   Fuel_Price          CPI Unemployment 
   "numeric"       "Date"    "numeric"    "numeric"    "numeric"    "numeric"    "numeric"    "numeric" 
# Sort values:
df <- df[order(df$Date, decreasing = FALSE), ] # gets an ordering of rows based on date and then df[ix] to select that order
head(df)
print(dim(df))
[1] 6435    8
table(df$Date) # value_counts

2010-02-05 2010-02-12 2010-02-19 2010-02-26 2010-03-05 2010-03-12 2010-03-19 2010-03-26 2010-04-02 2010-04-09 2010-04-16 2010-04-23 2010-04-30 2010-05-07 2010-05-14 2010-05-21 2010-05-28 2010-06-04 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 
2010-06-11 2010-06-18 2010-06-25 2010-07-02 2010-07-09 2010-07-16 2010-07-23 2010-07-30 2010-08-06 2010-08-13 2010-08-20 2010-08-27 2010-09-03 2010-09-10 2010-09-17 2010-09-24 2010-10-01 2010-10-08 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 
2010-10-15 2010-10-22 2010-10-29 2010-11-05 2010-11-12 2010-11-19 2010-11-26 2010-12-03 2010-12-10 2010-12-17 2010-12-24 2010-12-31 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 
2011-02-18 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25 2011-04-01 2011-04-08 2011-04-15 2011-04-22 2011-04-29 2011-05-06 2011-05-13 2011-05-20 2011-05-27 2011-06-03 2011-06-10 2011-06-17 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 
2011-06-24 2011-07-01 2011-07-08 2011-07-15 2011-07-22 2011-07-29 2011-08-05 2011-08-12 2011-08-19 2011-08-26 2011-09-02 2011-09-09 2011-09-16 2011-09-23 2011-09-30 2011-10-07 2011-10-14 2011-10-21 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 
2011-10-28 2011-11-04 2011-11-11 2011-11-18 2011-11-25 2011-12-02 2011-12-09 2011-12-16 2011-12-23 2011-12-30 2012-01-06 2012-01-13 2012-01-20 2012-01-27 2012-02-03 2012-02-10 2012-02-17 2012-02-24 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 
2012-03-02 2012-03-09 2012-03-16 2012-03-23 2012-03-30 2012-04-06 2012-04-13 2012-04-20 2012-04-27 2012-05-04 2012-05-11 2012-05-18 2012-05-25 2012-06-01 2012-06-08 2012-06-15 2012-06-22 2012-06-29 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 
2012-07-06 2012-07-13 2012-07-20 2012-07-27 2012-08-03 2012-08-10 2012-08-17 2012-08-24 2012-08-31 2012-09-07 2012-09-14 2012-09-21 2012-09-28 2012-10-05 2012-10-12 2012-10-19 2012-10-26 
        45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45         45 

There are 45 stores, so this makes sense, that there are 45 entries for each date. Also, each date represents one week.

Normalize weekly sales within each store, so that no one store dominates, and weekly sales are comparable: i.e. (sales_store - mean(sales_store)) / std(sales_store) and can convert back later via (scaled_sales * store_std) + store_mean

df <- df %>%
  group_by(Store) %>%
  mutate(
    sales_mean = mean(Weekly_Sales, na.rm = TRUE),
    sales_std   = sd(Weekly_Sales, na.rm = TRUE),
    weekly_sales_scaled = (Weekly_Sales - sales_mean) / sales_std
  ) %>%
  ungroup()
head(df)
for (store_id in 1:length(unique(df$Store))) {
  p <- df %>%
    filter(Store == store_id) %>%
    ggplot(aes(x = Date, y = weekly_sales_scaled)) +
    geom_line(color = "steelblue") +
    labs(title = paste("Weekly Sales Over Time - Store", store_id),
         x = "Date",
         y = "Weekly Sales (scaled per store)")
  print(p)
  }

NA
NA

2) Train/Test split on sorted data:

# (Can't randomize the data first, because it is time-series)
# Train/Test Split:
split_ix <- floor(.8*nrow(df))
train_df <- df[1:split_ix,]
test_df <- df[(split_ix+1):nrow(df),]

print(dim(train_df))
[1] 5148   11
print(dim(test_df))
[1] 1287   11
cat("train:", format(min(train_df$Date), "%Y-%m-%d"), " to ", format(max(train_df$Date), "%Y-%m-%d"),"\n")
train: 2010-02-05  to  2012-04-13 
cat("test:", format(min(test_df$Date), "%Y-%m-%d"), " to ", format(max(test_df$Date), "%Y-%m-%d"),"\n")
test: 2012-04-13  to  2012-10-26 
print(colnames(df))
 [1] "Store"               "Date"                "Weekly_Sales"        "Holiday_Flag"        "Temperature"         "Fuel_Price"          "CPI"                 "Unemployment"       
 [9] "sales_mean"          "sales_std"           "weekly_sales_scaled"
# Define design matrix pattern (to make it easier to form X's during cross val):
design_matrix_formula <- weekly_sales_scaled ~ . - Store - Date - Weekly_Sales - sales_mean - sales_std + 0 # 0 means no intercept, glmnet will add its own; here we drop columns

# model.matrix creates a design matrix from formula, auto-encodes categorical as dummy vars (if any--in this case, holiday is already a double.)
X_test <- model.matrix(design_matrix_formula, data=test_df)
y_test <- test_df$weekly_sales_scaled

print(dim(X_test))
[1] 1287    5
print(X_test[1:10,]) # see example of columns left for prediction
   Holiday_Flag Temperature Fuel_Price      CPI Unemployment
1             0       44.42      4.187 137.8680        8.150
2             0       45.68      4.044 214.3127        7.139
3             0       69.03      3.891 221.1484        6.891
4             0       49.89      4.025 141.8434        7.671
5             0       41.81      4.025 137.8680        4.125
6             0       48.65      4.187 137.8680        8.983
7             0       42.46      4.044 214.3127        7.139
8             0       36.90      4.025 137.8680        7.489
9             0       52.22      4.187 141.8434        8.253
10            0       64.28      4.254 131.1080       11.627

3) LASSO with Rolling Time-series cross validation

Because, it is important to keep the future in the test, or else it could cheat and accidentally learn the past and use it to predict the past, some inherent pattern. i.e. could use info from the future to predict the past (leakage).

# NOTE - ctrl shift c to mass uncomment/comment out
# Attempt 1 - shorter
# cv_folds <- time_series_cv(
#   data = train_df,
#   date_var = Date,
#   cumulative = FALSE, # no growing window (same size folds)
#   initial = "6 months", # length of train window
#   assess = "3 months", # length of validation window
#   skip = "3 months", # jump between folds
#   slice_limit = 5 # num folds max
# )

# Attempt 2-- longer, cumulative windowing
cv_folds <- time_series_cv(
  data = train_df,
  date_var = Date,
  cumulative = TRUE, # growing window (same size folds)
  initial = "1 year", # length of train window
  assess = "3 months", # length of validation window
  skip = "3 months", # jump between folds
  slice_limit = 5 # num folds max
)

plot_time_series_cv_plan(cv_folds, .date_var = Date, .value = weekly_sales_scaled, .interactive=TRUE)
# === Define Lambda search grid ===
# Easy way to get lambda grid with good scaling--from the glmnet function itself.
# Note that we will not keep this model fit, this is solely for the purpose of getting
# a lambda grid for our manual CV.
unused_X_all <- model.matrix(design_matrix_formula, data=train_df)
unused_y_all <- train_df$weekly_sales_scaled
unused_model <- glmnet(unused_X_all, unused_y_all, alpha=1)
lambda_grid <- unused_model$lambda
lambda_grid
 [1] 0.1450961864 0.1322062411 0.1204614030 0.1097599440 0.1000091731 0.0911246338 0.0830293725 0.0756532718 0.0689324437 0.0628086754 0.0572289258 0.0521448657 0.0475124596 0.0432915836 0.0394456786
[16] 0.0359414333 0.0327484954 0.0298392093 0.0271883762 0.0247730358 0.0225722676 0.0205670095 0.0187398931 0.0170750926 0.0155581885 0.0141760419 0.0129166814 0.0117691990 0.0107236558 0.0097709958
[31] 0.0089029675 0.0081120524 0.0073914000 0.0067347684 0.0061364701 0.0055913230 0.0050946053 0.0046420146 0.0042296308 0.0038538821 0.0035115138 0.0031995606 0.0029153205 0.0026563314 0.0024203503
[46] 0.0022053330 0.0020094173 0.0018309062 0.0016682536 0.0015200505 0.0013850134 0.0012619726 0.0011498625 0.0010477119 0.0009546360 0.0008698288 0.0007925556 0.0007221471 0.0006579936 0.0005995392
[61] 0.0005462778 0.0004977480 0.0004535294 0.0004132391
n_folds <- length(cv_folds$splits)
n_lambdas <- length(lambda_grid)
cv_mses <- numeric(n_lambdas) # keep track of MSE across different lambdas
lambda_ix <- 1

for (lambda in lambda_grid) {
  fold_mses <- numeric(n_folds) # array for keeping track of mse across folds
  
  for (fold_ix in 1:n_folds) {
    # Get train, valid data for current cv fold:
    fold <- cv_folds$splits[[fold_ix]]
    train_data <- analysis(fold)
    valid_data <- assessment(fold)
    
    # Manipulate data into design matrices:
    X_train <- model.matrix(design_matrix_formula, data=train_data)
    y_train <- train_data$weekly_sales_scaled
    X_val <- model.matrix(design_matrix_formula, data=valid_data)
    y_val <- valid_data$weekly_sales_scaled
    
    # Fit LASSO (non-cv version):
    lasso_model <- glmnet(X_train, y_train, alpha=1, lambda=lambda)
    
    # Get val error:
    yhat_val <- predict(lasso_model, newx=X_val, s=lambda)
    fold_mses[fold_ix] <- mean((y_val - yhat_val)^2)
  }
  cv_mses[lambda_ix] <- mean(fold_mses)
  lambda_ix <- lambda_ix + 1
  
}

best_lambda <- lambda_grid[which.min(cv_mses)]
cat("best lambda: ", best_lambda, "with min mse ", min(cv_mses), "\n\n")
best lambda:  0.06893244 with min mse  1.000508 
plot(x=lambda_grid,
     y=cv_mses,
     main = paste0(n_folds, "-Fold Cross-Validation MSE for different Lambdas"),
     xlab = "Lambda",
     ylab = "Mean CV MSE")

#best_ix <- which.min(cv_mses)
#points(best_lambda, cv_mses[best_ix], col = "red", pch = 19, cex = 1.5)
#text(best_lambda, cv_mses[best_ix],
#     labels = paste("Best λ =", round(best_lambda, 4)),
#     pos = 4, col = "red")

4) Fit model with best lambda on full data, get test error:

X_train <- model.matrix(design_matrix_formula, data=train_df)
y_train <- train_df$weekly_sales_scaled
best_lasso_model <- glmnet(X_train, y_train, alpha=1, lambda=best_lambda)

 # Get val error:
yhat_test <- predict(best_lasso_model, newx=X_test, s=best_lambda)
mse_test <- mean((y_test - yhat_test)^2)
cat("Test MSE: ", mse_test, "\n\n")
Test MSE:  0.4069579 
coef(best_lasso_model)
6 x 1 sparse Matrix of class "dgCMatrix"
                       s0
(Intercept)   0.078758823
Holiday_Flag  0.262235923
Temperature  -0.001764706
Fuel_Price    .          
CPI           .          
Unemployment  .          

First CV param attempt notes: The above serves as a baseline, when using no time-averaging or time-lagged features. It could just predict the mean, not finding a strong signal with the other features. Next, we will need to try incorporating time. In the meantime, some things I will try are changing the folds and lambdas used.

second CV param attempt notes: Second attempt, used 1 year initial, and cumulative folds. Now holidays and temperature matter, which is better than the first attempt, which had 0’s for all features and intercept was the only kept feature. MSE stayed the same.

LASSO - baseline, NO lagged variables.

5) Plots to analyze the results

Lasso Path–shows variables entering and leaving as lambda (regularization) increases. (NOT cross val).

fit_path <- glmnet(X_train, y_train, alpha = 1)  # doesnt take best lambda, fits a bunch of lambdas, no CV
plot(fit_path, xvar = "lambda", label = TRUE)
title("LASSO Coefficient Paths")

beta_mat <- as.matrix(fit_path$beta)
#rownames(beta_mat)
variable_map <- data.frame(
  index = seq_len(nrow(beta_mat)),
  variable = rownames(beta_mat)
)

print(variable_map)
 # 1) Extract coefficients for each tested lambda value, and convert datatype to R matrix:
 Beta_lambda <- as.matrix(coef(fit_path, s=fit_path$lambda))
 
 # 2) Even tho I didn't specify intercept, there is a intercept row of 0's. 
#Beta_lambda <- Beta_lambda[-1,, drop=FALSE] # all rows except row 1, all columns; dont change dimensions
 # print(Beta_lambda)
 cat('Beta_lambda shape:', dim(Beta_lambda), '\n') # shape: (p x #lambdas
Beta_lambda shape: 6 64 
 
 # 3) Get L0, L1 norms:
 l1_norm = colSums(abs(Beta_lambda)) # sum up each betah_lambda col; colSums computes sum of each column
 l0_norm = colSums(Beta_lambda != 0) # number of nonzero coeffs for each lambda
 # 4) Want the plot's x axis to be increasing model complexity (incr L1 norm to the right)
 incr_order_ixs = order(l1_norm) # returns indices to make it incr order
 l1_norm_ordered = l1_norm[incr_order_ixs]
 l0_norm_ordered = l0_norm[incr_order_ixs]
 Beta_lambda_ordered = Beta_lambda[,incr_order_ixs,drop=FALSE]
 # 5) Plot coeff vs. L1 norm:
 # note: currently, Beta_lambda's columns are for each lambda, but matplot plots columns of y
 #       as separate lines. We want a line for each coeff (p). Beta_lambda is (p x #lambdas).
 #       so, we take the transpose of beta.
 #       also, num rows should match for x (#lambdas x 1) and y (p x #lambdas).
 cat('L1 norm shape:', length(l1_norm_ordered), '\n')
L1 norm shape: 64 
 cat('Beta_lambda shape:', dim(Beta_lambda_ordered), '\n')
Beta_lambda shape: 6 64 
 
 matplot(x=l1_norm,  y=t(Beta_lambda),
 type="l",# plot type: line
 lwd=2, # line width
 lty=1, # line type (solid, dashed)
 xlab=expression("L1 Norm (" * "||" * hat(beta[lambda]) * "||"[1] * ")"),
 ylab="Coefficients",
 main="Lasso Path")
 abline(h = 0, col = "black", lwd = 1, lty = 2) # show x axis more easily

LS0tDQp0aXRsZTogIkxhc3NvIC0gV2FsbWFydCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyfQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJ0aW1ldGsiKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHJlYWRyKSAjcmVhZF9jc3YoKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2xtbmV0KQ0KbGlicmFyeShjYXJldCkgIyBtYXkgbm90IG5lZWQgYW55bW9yZQ0KbGlicmFyeSh0aW1ldGspDQpsaWJyYXJ5KHJzYW1wbGUpDQpgYGANCg0KDQojIyMgMSkgTG9hZCBEYXRhDQpgYGB7cn0NCiMgTG9hZCBkYXRhOg0KZGYgPC0gcmVhZF9jc3YoIi4vZGF0YS93YWxtYXJ0L1dhbG1hcnQuY3N2IikNCmhlYWQoZGYpDQpgYGANCg0KKipGaXggRGF0YXR5cGVzLCBTb3J0IFZhbHVlcyoqDQpgYGB7cn0NCiMgRml4aW5nIGRhdGF0eXBlczoNCiMgYW55KGdyZXBsKCIvIiwgZGYkRGF0ZSkpDQpwcmludChzYXBwbHkoZGYsIGNsYXNzKSkNCmRmJERhdGUgPC0gZ3N1YigiLyIsICItIiwgZGYkRGF0ZSkgIyByZXBsYWNlIHNsYXNoZXMgd2l0aCBkYXNoZXMsIHNvIGFsbCBkYXRlcyBmb2xsb3cgc2FtZSBmb3JtYXQNCmRmJERhdGUgPC0gYXMuRGF0ZShkZiREYXRlLCBmb3JtYXQgPSAiJWQtJW0tJVkiKSAjIE5PVEUgZXVyb3BlYW4tbGlrZSBkYXRlOiBkYXkvbW9udGgveWVhcg0KcHJpbnQoc2FwcGx5KGRmLCBjbGFzcykpDQoNCiMgU29ydCB2YWx1ZXM6DQpkZiA8LSBkZltvcmRlcihkZiREYXRlLCBkZWNyZWFzaW5nID0gRkFMU0UpLCBdICMgZ2V0cyBhbiBvcmRlcmluZyBvZiByb3dzIGJhc2VkIG9uIGRhdGUgYW5kIHRoZW4gZGZbaXhdIHRvIHNlbGVjdCB0aGF0IG9yZGVyDQpoZWFkKGRmKQ0KYGBgDQpgYGB7cn0NCnByaW50KGRpbShkZikpDQp0YWJsZShkZiREYXRlKSAjIHZhbHVlX2NvdW50cw0KYGBgDQoNCioqVGhlcmUgYXJlIDQ1IHN0b3Jlcywgc28gdGhpcyBtYWtlcyBzZW5zZSwgdGhhdCB0aGVyZSBhcmUgNDUgZW50cmllcyBmb3IgZWFjaCBkYXRlLiBBbHNvLCBlYWNoIGRhdGUgcmVwcmVzZW50cyBvbmUgd2Vlay4qKg0KDQoNCioqTm9ybWFsaXplIHdlZWtseSBzYWxlcyB3aXRoaW4gZWFjaCBzdG9yZSwgc28gdGhhdCBubyBvbmUgc3RvcmUgZG9taW5hdGVzLCBhbmQgd2Vla2x5IHNhbGVzIGFyZSBjb21wYXJhYmxlOioqDQppLmUuIChzYWxlc19zdG9yZSAtIG1lYW4oc2FsZXNfc3RvcmUpKSAvIHN0ZChzYWxlc19zdG9yZSkNCmFuZCBjYW4gY29udmVydCBiYWNrIGxhdGVyIHZpYSAoc2NhbGVkX3NhbGVzICogc3RvcmVfc3RkKSArIHN0b3JlX21lYW4NCmBgYHtyfQ0KZGYgPC0gZGYgJT4lDQogIGdyb3VwX2J5KFN0b3JlKSAlPiUNCiAgbXV0YXRlKA0KICAgIHNhbGVzX21lYW4gPSBtZWFuKFdlZWtseV9TYWxlcywgbmEucm0gPSBUUlVFKSwNCiAgICBzYWxlc19zdGQgICA9IHNkKFdlZWtseV9TYWxlcywgbmEucm0gPSBUUlVFKSwNCiAgICB3ZWVrbHlfc2FsZXNfc2NhbGVkID0gKFdlZWtseV9TYWxlcyAtIHNhbGVzX21lYW4pIC8gc2FsZXNfc3RkDQogICkgJT4lDQogIHVuZ3JvdXAoKQ0KaGVhZChkZikNCmBgYA0KDQpgYGB7cn0NCmZvciAoc3RvcmVfaWQgaW4gMTpsZW5ndGgodW5pcXVlKGRmJFN0b3JlKSkpIHsNCiAgcCA8LSBkZiAlPiUNCiAgICBmaWx0ZXIoU3RvcmUgPT0gc3RvcmVfaWQpICU+JQ0KICAgIGdncGxvdChhZXMoeCA9IERhdGUsIHkgPSB3ZWVrbHlfc2FsZXNfc2NhbGVkKSkgKw0KICAgIGdlb21fbGluZShjb2xvciA9ICJzdGVlbGJsdWUiKSArDQogICAgbGFicyh0aXRsZSA9IHBhc3RlKCJXZWVrbHkgU2FsZXMgT3ZlciBUaW1lIC0gU3RvcmUiLCBzdG9yZV9pZCksDQogICAgICAgICB4ID0gIkRhdGUiLA0KICAgICAgICAgeSA9ICJXZWVrbHkgU2FsZXMgKHNjYWxlZCBwZXIgc3RvcmUpIikNCiAgcHJpbnQocCkNCiAgfQ0KDQoNCmBgYA0KDQojIyMgMikgVHJhaW4vVGVzdCBzcGxpdCBvbiBzb3J0ZWQgZGF0YToNCmBgYHtyfQ0KIyAoQ2FuJ3QgcmFuZG9taXplIHRoZSBkYXRhIGZpcnN0LCBiZWNhdXNlIGl0IGlzIHRpbWUtc2VyaWVzKQ0KIyBUcmFpbi9UZXN0IFNwbGl0Og0Kc3BsaXRfaXggPC0gZmxvb3IoLjgqbnJvdyhkZikpDQp0cmFpbl9kZiA8LSBkZlsxOnNwbGl0X2l4LF0NCnRlc3RfZGYgPC0gZGZbKHNwbGl0X2l4KzEpOm5yb3coZGYpLF0NCg0KcHJpbnQoZGltKHRyYWluX2RmKSkNCnByaW50KGRpbSh0ZXN0X2RmKSkNCmNhdCgidHJhaW46IiwgZm9ybWF0KG1pbih0cmFpbl9kZiREYXRlKSwgIiVZLSVtLSVkIiksICIgdG8gIiwgZm9ybWF0KG1heCh0cmFpbl9kZiREYXRlKSwgIiVZLSVtLSVkIiksIlxuIikNCmNhdCgidGVzdDoiLCBmb3JtYXQobWluKHRlc3RfZGYkRGF0ZSksICIlWS0lbS0lZCIpLCAiIHRvICIsIGZvcm1hdChtYXgodGVzdF9kZiREYXRlKSwgIiVZLSVtLSVkIiksIlxuIikNCnByaW50KGNvbG5hbWVzKGRmKSkNCg0KIyBEZWZpbmUgZGVzaWduIG1hdHJpeCBwYXR0ZXJuICh0byBtYWtlIGl0IGVhc2llciB0byBmb3JtIFgncyBkdXJpbmcgY3Jvc3MgdmFsKToNCmRlc2lnbl9tYXRyaXhfZm9ybXVsYSA8LSB3ZWVrbHlfc2FsZXNfc2NhbGVkIH4gLiAtIFN0b3JlIC0gRGF0ZSAtIFdlZWtseV9TYWxlcyAtIHNhbGVzX21lYW4gLSBzYWxlc19zdGQgKyAwICMgMCBtZWFucyBubyBpbnRlcmNlcHQsIGdsbW5ldCB3aWxsIGFkZCBpdHMgb3duOyBoZXJlIHdlIGRyb3AgY29sdW1ucw0KDQojIG1vZGVsLm1hdHJpeCBjcmVhdGVzIGEgZGVzaWduIG1hdHJpeCBmcm9tIGZvcm11bGEsIGF1dG8tZW5jb2RlcyBjYXRlZ29yaWNhbCBhcyBkdW1teSB2YXJzIChpZiBhbnktLWluIHRoaXMgY2FzZSwgaG9saWRheSBpcyBhbHJlYWR5IGEgZG91YmxlLikNClhfdGVzdCA8LSBtb2RlbC5tYXRyaXgoZGVzaWduX21hdHJpeF9mb3JtdWxhLCBkYXRhPXRlc3RfZGYpDQp5X3Rlc3QgPC0gdGVzdF9kZiR3ZWVrbHlfc2FsZXNfc2NhbGVkDQoNCnByaW50KGRpbShYX3Rlc3QpKQ0KcHJpbnQoWF90ZXN0WzE6MTAsXSkgIyBzZWUgZXhhbXBsZSBvZiBjb2x1bW5zIGxlZnQgZm9yIHByZWRpY3Rpb24NCmBgYA0KDQojIyMgMykgTEFTU08gd2l0aCBSb2xsaW5nIFRpbWUtc2VyaWVzIGNyb3NzIHZhbGlkYXRpb24NCkJlY2F1c2UsIGl0IGlzIGltcG9ydGFudCB0byBrZWVwIHRoZSBmdXR1cmUgaW4gdGhlIHRlc3QsIG9yIGVsc2UgaXQgY291bGQgY2hlYXQgYW5kIA0KYWNjaWRlbnRhbGx5IGxlYXJuIHRoZSBwYXN0IGFuZCB1c2UgaXQgdG8gcHJlZGljdCB0aGUgcGFzdCwgc29tZSBpbmhlcmVudCBwYXR0ZXJuLg0KaS5lLiBjb3VsZCB1c2UgaW5mbyBmcm9tIHRoZSBmdXR1cmUgdG8gcHJlZGljdCB0aGUgcGFzdCAobGVha2FnZSkuDQoNCmBgYHtyfQ0KIyBOT1RFIC0gY3RybCBzaGlmdCBjIHRvIG1hc3MgdW5jb21tZW50L2NvbW1lbnQgb3V0DQojIEF0dGVtcHQgMSAtIHNob3J0ZXINCiMgY3ZfZm9sZHMgPC0gdGltZV9zZXJpZXNfY3YoDQojICAgZGF0YSA9IHRyYWluX2RmLA0KIyAgIGRhdGVfdmFyID0gRGF0ZSwNCiMgICBjdW11bGF0aXZlID0gRkFMU0UsICMgbm8gZ3Jvd2luZyB3aW5kb3cgKHNhbWUgc2l6ZSBmb2xkcykNCiMgICBpbml0aWFsID0gIjYgbW9udGhzIiwgIyBsZW5ndGggb2YgdHJhaW4gd2luZG93DQojICAgYXNzZXNzID0gIjMgbW9udGhzIiwgIyBsZW5ndGggb2YgdmFsaWRhdGlvbiB3aW5kb3cNCiMgICBza2lwID0gIjMgbW9udGhzIiwgIyBqdW1wIGJldHdlZW4gZm9sZHMNCiMgICBzbGljZV9saW1pdCA9IDUgIyBudW0gZm9sZHMgbWF4DQojICkNCg0KIyBBdHRlbXB0IDItLSBsb25nZXIsIGN1bXVsYXRpdmUgd2luZG93aW5nDQpjdl9mb2xkcyA8LSB0aW1lX3Nlcmllc19jdigNCiAgZGF0YSA9IHRyYWluX2RmLA0KICBkYXRlX3ZhciA9IERhdGUsDQogIGN1bXVsYXRpdmUgPSBUUlVFLCAjIGdyb3dpbmcgd2luZG93IChzYW1lIHNpemUgZm9sZHMpDQogIGluaXRpYWwgPSAiMSB5ZWFyIiwgIyBsZW5ndGggb2YgdHJhaW4gd2luZG93DQogIGFzc2VzcyA9ICIzIG1vbnRocyIsICMgbGVuZ3RoIG9mIHZhbGlkYXRpb24gd2luZG93DQogIHNraXAgPSAiMyBtb250aHMiLCAjIGp1bXAgYmV0d2VlbiBmb2xkcw0KICBzbGljZV9saW1pdCA9IDUgIyBudW0gZm9sZHMgbWF4DQopDQoNCnBsb3RfdGltZV9zZXJpZXNfY3ZfcGxhbihjdl9mb2xkcywgLmRhdGVfdmFyID0gRGF0ZSwgLnZhbHVlID0gd2Vla2x5X3NhbGVzX3NjYWxlZCwgLmludGVyYWN0aXZlPVRSVUUpDQpgYGANCg0KYGBge3J9DQojID09PSBEZWZpbmUgTGFtYmRhIHNlYXJjaCBncmlkID09PQ0KIyBFYXN5IHdheSB0byBnZXQgbGFtYmRhIGdyaWQgd2l0aCBnb29kIHNjYWxpbmctLWZyb20gdGhlIGdsbW5ldCBmdW5jdGlvbiBpdHNlbGYuDQojIE5vdGUgdGhhdCB3ZSB3aWxsIG5vdCBrZWVwIHRoaXMgbW9kZWwgZml0LCB0aGlzIGlzIHNvbGVseSBmb3IgdGhlIHB1cnBvc2Ugb2YgZ2V0dGluZw0KIyBhIGxhbWJkYSBncmlkIGZvciBvdXIgbWFudWFsIENWLg0KdW51c2VkX1hfYWxsIDwtIG1vZGVsLm1hdHJpeChkZXNpZ25fbWF0cml4X2Zvcm11bGEsIGRhdGE9dHJhaW5fZGYpDQp1bnVzZWRfeV9hbGwgPC0gdHJhaW5fZGYkd2Vla2x5X3NhbGVzX3NjYWxlZA0KdW51c2VkX21vZGVsIDwtIGdsbW5ldCh1bnVzZWRfWF9hbGwsIHVudXNlZF95X2FsbCwgYWxwaGE9MSkNCmxhbWJkYV9ncmlkIDwtIHVudXNlZF9tb2RlbCRsYW1iZGENCmxhbWJkYV9ncmlkDQpgYGANCg0KDQpgYGB7cn0NCm5fZm9sZHMgPC0gbGVuZ3RoKGN2X2ZvbGRzJHNwbGl0cykNCm5fbGFtYmRhcyA8LSBsZW5ndGgobGFtYmRhX2dyaWQpDQpjdl9tc2VzIDwtIG51bWVyaWMobl9sYW1iZGFzKSAjIGtlZXAgdHJhY2sgb2YgTVNFIGFjcm9zcyBkaWZmZXJlbnQgbGFtYmRhcw0KbGFtYmRhX2l4IDwtIDENCg0KZm9yIChsYW1iZGEgaW4gbGFtYmRhX2dyaWQpIHsNCiAgZm9sZF9tc2VzIDwtIG51bWVyaWMobl9mb2xkcykgIyBhcnJheSBmb3Iga2VlcGluZyB0cmFjayBvZiBtc2UgYWNyb3NzIGZvbGRzDQogIA0KICBmb3IgKGZvbGRfaXggaW4gMTpuX2ZvbGRzKSB7DQogICAgIyBHZXQgdHJhaW4sIHZhbGlkIGRhdGEgZm9yIGN1cnJlbnQgY3YgZm9sZDoNCiAgICBmb2xkIDwtIGN2X2ZvbGRzJHNwbGl0c1tbZm9sZF9peF1dDQogICAgdHJhaW5fZGF0YSA8LSBhbmFseXNpcyhmb2xkKQ0KICAgIHZhbGlkX2RhdGEgPC0gYXNzZXNzbWVudChmb2xkKQ0KICAgIA0KICAgICMgTWFuaXB1bGF0ZSBkYXRhIGludG8gZGVzaWduIG1hdHJpY2VzOg0KICAgIFhfdHJhaW4gPC0gbW9kZWwubWF0cml4KGRlc2lnbl9tYXRyaXhfZm9ybXVsYSwgZGF0YT10cmFpbl9kYXRhKQ0KICAgIHlfdHJhaW4gPC0gdHJhaW5fZGF0YSR3ZWVrbHlfc2FsZXNfc2NhbGVkDQogICAgWF92YWwgPC0gbW9kZWwubWF0cml4KGRlc2lnbl9tYXRyaXhfZm9ybXVsYSwgZGF0YT12YWxpZF9kYXRhKQ0KICAgIHlfdmFsIDwtIHZhbGlkX2RhdGEkd2Vla2x5X3NhbGVzX3NjYWxlZA0KICAgIA0KICAgICMgRml0IExBU1NPIChub24tY3YgdmVyc2lvbik6DQogICAgbGFzc29fbW9kZWwgPC0gZ2xtbmV0KFhfdHJhaW4sIHlfdHJhaW4sIGFscGhhPTEsIGxhbWJkYT1sYW1iZGEpDQogICAgDQogICAgIyBHZXQgdmFsIGVycm9yOg0KICAgIHloYXRfdmFsIDwtIHByZWRpY3QobGFzc29fbW9kZWwsIG5ld3g9WF92YWwsIHM9bGFtYmRhKQ0KICAgIGZvbGRfbXNlc1tmb2xkX2l4XSA8LSBtZWFuKCh5X3ZhbCAtIHloYXRfdmFsKV4yKQ0KICB9DQogIGN2X21zZXNbbGFtYmRhX2l4XSA8LSBtZWFuKGZvbGRfbXNlcykNCiAgbGFtYmRhX2l4IDwtIGxhbWJkYV9peCArIDENCiAgDQp9DQoNCmJlc3RfbGFtYmRhIDwtIGxhbWJkYV9ncmlkW3doaWNoLm1pbihjdl9tc2VzKV0NCmNhdCgiYmVzdCBsYW1iZGE6ICIsIGJlc3RfbGFtYmRhLCAid2l0aCBtaW4gbXNlICIsIG1pbihjdl9tc2VzKSwgIlxuXG4iKQ0KYGBgDQoNCmBgYHtyfQ0KcGxvdCh4PWxhbWJkYV9ncmlkLA0KICAgICB5PWN2X21zZXMsDQogICAgIG1haW4gPSBwYXN0ZTAobl9mb2xkcywgIi1Gb2xkIENyb3NzLVZhbGlkYXRpb24gTVNFIGZvciBkaWZmZXJlbnQgTGFtYmRhcyIpLA0KICAgICB4bGFiID0gIkxhbWJkYSIsDQogICAgIHlsYWIgPSAiTWVhbiBDViBNU0UiKQ0KI2Jlc3RfaXggPC0gd2hpY2gubWluKGN2X21zZXMpDQojcG9pbnRzKGJlc3RfbGFtYmRhLCBjdl9tc2VzW2Jlc3RfaXhdLCBjb2wgPSAicmVkIiwgcGNoID0gMTksIGNleCA9IDEuNSkNCiN0ZXh0KGJlc3RfbGFtYmRhLCBjdl9tc2VzW2Jlc3RfaXhdLA0KIyAgICAgbGFiZWxzID0gcGFzdGUoIkJlc3QgzrsgPSIsIHJvdW5kKGJlc3RfbGFtYmRhLCA0KSksDQojICAgICBwb3MgPSA0LCBjb2wgPSAicmVkIikNCmBgYA0KIyMjIDQpIEZpdCBtb2RlbCB3aXRoIGJlc3QgbGFtYmRhIG9uIGZ1bGwgZGF0YSwgZ2V0IHRlc3QgZXJyb3I6DQpgYGB7cn0NClhfdHJhaW4gPC0gbW9kZWwubWF0cml4KGRlc2lnbl9tYXRyaXhfZm9ybXVsYSwgZGF0YT10cmFpbl9kZikNCnlfdHJhaW4gPC0gdHJhaW5fZGYkd2Vla2x5X3NhbGVzX3NjYWxlZA0KYmVzdF9sYXNzb19tb2RlbCA8LSBnbG1uZXQoWF90cmFpbiwgeV90cmFpbiwgYWxwaGE9MSwgbGFtYmRhPWJlc3RfbGFtYmRhKQ0KDQogIyBHZXQgdmFsIGVycm9yOg0KeWhhdF90ZXN0IDwtIHByZWRpY3QoYmVzdF9sYXNzb19tb2RlbCwgbmV3eD1YX3Rlc3QsIHM9YmVzdF9sYW1iZGEpDQptc2VfdGVzdCA8LSBtZWFuKCh5X3Rlc3QgLSB5aGF0X3Rlc3QpXjIpDQpjYXQoIlRlc3QgTVNFOiAiLCBtc2VfdGVzdCwgIlxuXG4iKQ0KY29lZihiZXN0X2xhc3NvX21vZGVsKQ0KYGBgDQoNCioqRmlyc3QgQ1YgcGFyYW0gYXR0ZW1wdCBub3RlczoqKg0KVGhlIGFib3ZlIHNlcnZlcyBhcyBhIGJhc2VsaW5lLCB3aGVuIHVzaW5nIG5vIHRpbWUtYXZlcmFnaW5nIG9yIHRpbWUtbGFnZ2VkIGZlYXR1cmVzLiBJdCBjb3VsZCBqdXN0IHByZWRpY3QgdGhlIG1lYW4sIG5vdCBmaW5kaW5nIGEgc3Ryb25nIHNpZ25hbCB3aXRoIHRoZSBvdGhlciBmZWF0dXJlcy4gTmV4dCwgd2Ugd2lsbCBuZWVkIHRvIHRyeSBpbmNvcnBvcmF0aW5nIHRpbWUuIEluIHRoZSBtZWFudGltZSwgc29tZSB0aGluZ3MgSSB3aWxsIHRyeSBhcmUgY2hhbmdpbmcgdGhlIGZvbGRzIGFuZCBsYW1iZGFzIHVzZWQuDQoNCioqc2Vjb25kIENWIHBhcmFtIGF0dGVtcHQgbm90ZXM6KioNClNlY29uZCBhdHRlbXB0LCB1c2VkIDEgeWVhciBpbml0aWFsLCBhbmQgY3VtdWxhdGl2ZSBmb2xkcy4gTm93IGhvbGlkYXlzIGFuZCB0ZW1wZXJhdHVyZSBtYXR0ZXIsIHdoaWNoIGlzIGJldHRlciB0aGFuIHRoZSBmaXJzdCBhdHRlbXB0LCB3aGljaCBoYWQgMCdzIGZvciBhbGwgZmVhdHVyZXMgYW5kIGludGVyY2VwdCB3YXMgdGhlIG9ubHkga2VwdCBmZWF0dXJlLiBNU0Ugc3RheWVkIHRoZSBzYW1lLg0KDQpMQVNTTyAtIGJhc2VsaW5lLCBOTyBsYWdnZWQgdmFyaWFibGVzLg0KDQojIyMgNSkgUGxvdHMgdG8gYW5hbHl6ZSB0aGUgcmVzdWx0cw0KKipMYXNzbyBQYXRoLS1zaG93cyB2YXJpYWJsZXMgZW50ZXJpbmcgYW5kIGxlYXZpbmcgYXMgbGFtYmRhIChyZWd1bGFyaXphdGlvbikgaW5jcmVhc2VzLiAoTk9UIGNyb3NzIHZhbCkuKioNCmBgYHtyfQ0KZml0X3BhdGggPC0gZ2xtbmV0KFhfdHJhaW4sIHlfdHJhaW4sIGFscGhhID0gMSkgICMgZG9lc250IHRha2UgYmVzdCBsYW1iZGEsIGZpdHMgYSBidW5jaCBvZiBsYW1iZGFzLCBubyBDVg0KcGxvdChmaXRfcGF0aCwgeHZhciA9ICJsYW1iZGEiLCBsYWJlbCA9IFRSVUUpDQp0aXRsZSgiTEFTU08gQ29lZmZpY2llbnQgUGF0aHMiKQ0KDQoNCiMgYXR0ZW1wdGluZyB0byBnZXQgYSBtYXBwaW5nIHRvIHZhciBuYW1lcyBidXQgdGhpcyBkb2VzbnQgaW5jbHVkZSBpbnRlcmNlcHQgKFRPRE8pDQpiZXRhX21hdCA8LSBhcy5tYXRyaXgoZml0X3BhdGgkYmV0YSkNCiNyb3duYW1lcyhiZXRhX21hdCkNCnZhcmlhYmxlX21hcCA8LSBkYXRhLmZyYW1lKA0KICBpbmRleCA9IHNlcV9sZW4obnJvdyhiZXRhX21hdCkpLA0KICB2YXJpYWJsZSA9IHJvd25hbWVzKGJldGFfbWF0KQ0KKQ0KDQpwcmludCh2YXJpYWJsZV9tYXApDQpgYGANCmBgYHtyfQ0KICMgMSkgRXh0cmFjdCBjb2VmZmljaWVudHMgZm9yIGVhY2ggdGVzdGVkIGxhbWJkYSB2YWx1ZSwgYW5kIGNvbnZlcnQgZGF0YXR5cGUgdG8gUiBtYXRyaXg6DQogQmV0YV9sYW1iZGEgPC0gYXMubWF0cml4KGNvZWYoZml0X3BhdGgsIHM9Zml0X3BhdGgkbGFtYmRhKSkNCiANCiAjIDIpIEV2ZW4gdGhvIEkgZGlkbid0IHNwZWNpZnkgaW50ZXJjZXB0LCB0aGVyZSBpcyBhIGludGVyY2VwdCByb3cgb2YgMCdzLiANCiNCZXRhX2xhbWJkYSA8LSBCZXRhX2xhbWJkYVstMSwsIGRyb3A9RkFMU0VdICMgYWxsIHJvd3MgZXhjZXB0IHJvdyAxLCBhbGwgY29sdW1uczsgZG9udCBjaGFuZ2UgZGltZW5zaW9ucw0KICMgcHJpbnQoQmV0YV9sYW1iZGEpDQogY2F0KCdCZXRhX2xhbWJkYSBzaGFwZTonLCBkaW0oQmV0YV9sYW1iZGEpLCAnXG4nKSAjIHNoYXBlOiAocCB4ICNsYW1iZGFzDQogDQogIyAzKSBHZXQgTDAsIEwxIG5vcm1zOg0KIGwxX25vcm0gPSBjb2xTdW1zKGFicyhCZXRhX2xhbWJkYSkpICMgc3VtIHVwIGVhY2ggYmV0YWhfbGFtYmRhIGNvbDsgY29sU3VtcyBjb21wdXRlcyBzdW0gb2YgZWFjaCBjb2x1bW4NCiBsMF9ub3JtID0gY29sU3VtcyhCZXRhX2xhbWJkYSAhPSAwKSAjIG51bWJlciBvZiBub256ZXJvIGNvZWZmcyBmb3IgZWFjaCBsYW1iZGENCiAjIDQpIFdhbnQgdGhlIHBsb3QncyB4IGF4aXMgdG8gYmUgaW5jcmVhc2luZyBtb2RlbCBjb21wbGV4aXR5IChpbmNyIEwxIG5vcm0gdG8gdGhlIHJpZ2h0KQ0KIGluY3Jfb3JkZXJfaXhzID0gb3JkZXIobDFfbm9ybSkgIyByZXR1cm5zIGluZGljZXMgdG8gbWFrZSBpdCBpbmNyIG9yZGVyDQogbDFfbm9ybV9vcmRlcmVkID0gbDFfbm9ybVtpbmNyX29yZGVyX2l4c10NCiBsMF9ub3JtX29yZGVyZWQgPSBsMF9ub3JtW2luY3Jfb3JkZXJfaXhzXQ0KIEJldGFfbGFtYmRhX29yZGVyZWQgPSBCZXRhX2xhbWJkYVssaW5jcl9vcmRlcl9peHMsZHJvcD1GQUxTRV0NCiAjIDUpIFBsb3QgY29lZmYgdnMuIEwxIG5vcm06DQogIyBub3RlOiBjdXJyZW50bHksIEJldGFfbGFtYmRhJ3MgY29sdW1ucyBhcmUgZm9yIGVhY2ggbGFtYmRhLCBidXQgbWF0cGxvdCBwbG90cyBjb2x1bW5zIG9mIHkNCiAjICAgICAgIGFzIHNlcGFyYXRlIGxpbmVzLiBXZSB3YW50IGEgbGluZSBmb3IgZWFjaCBjb2VmZiAocCkuIEJldGFfbGFtYmRhIGlzIChwIHggI2xhbWJkYXMpLg0KICMgICAgICAgc28sIHdlIHRha2UgdGhlIHRyYW5zcG9zZSBvZiBiZXRhLg0KICMgICAgICAgYWxzbywgbnVtIHJvd3Mgc2hvdWxkIG1hdGNoIGZvciB4ICgjbGFtYmRhcyB4IDEpIGFuZCB5IChwIHggI2xhbWJkYXMpLg0KIGNhdCgnTDEgbm9ybSBzaGFwZTonLCBsZW5ndGgobDFfbm9ybV9vcmRlcmVkKSwgJ1xuJykNCiBjYXQoJ0JldGFfbGFtYmRhIHNoYXBlOicsIGRpbShCZXRhX2xhbWJkYV9vcmRlcmVkKSwgJ1xuJykNCiANCiBtYXRwbG90KHg9bDFfbm9ybSwgIHk9dChCZXRhX2xhbWJkYSksDQogdHlwZT0ibCIsIyBwbG90IHR5cGU6IGxpbmUNCiBsd2Q9MiwgIyBsaW5lIHdpZHRoDQogbHR5PTEsICMgbGluZSB0eXBlIChzb2xpZCwgZGFzaGVkKQ0KIHhsYWI9ZXhwcmVzc2lvbigiTDEgTm9ybSAoIiAqICJ8fCIgKiBoYXQoYmV0YVtsYW1iZGFdKSAqICJ8fCJbMV0gKiAiKSIpLA0KIHlsYWI9IkNvZWZmaWNpZW50cyIsDQogbWFpbj0iTGFzc28gUGF0aCIpDQogYWJsaW5lKGggPSAwLCBjb2wgPSAiYmxhY2siLCBsd2QgPSAxLCBsdHkgPSAyKSAjIHNob3cgeCBheGlzIG1vcmUgZWFzaWx5DQpgYGANCg0K